package com.spiden.bazaspiden.task;

import com.spiden.bazaspiden.pipeline.FarmerPipeline;
import com.spiden.bazaspiden.processor.FarmerPageProcessor;
import com.spiden.bazaspiden.utils.AppConfig;
import com.spiden.bazaspiden.webmagic.selenium.NewSeleniumDownloader;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Spider;

import javax.annotation.Resource;

/**
 * @author 张高昌
 * @date 2024/1/4 19:45
 * @description: 爬虫定时任务
 */
@Component
public class NewsScheduled {

    /**
     * 一定要将Pipeline的实现类交给spring管理
     */
    @Resource
    private FarmerPipeline farmerPipeline;

    //爬取地址目前先写在这里，后续是要爬取多个网站
    private String pathUrl = "https://www.farmer.com.cn/farmer/xw/sntt/list.shtml";

    /**
     * 10分钟执行一次
     */
//    @Scheduled(cron = "0 0/10 * * * ? ")
//    public void testFarmer() {
//        Spider spider = Spider.create(new FarmerPageProcessor());
//        spider.addUrl(pathUrl)
//                .setDownloader(new NewSeleniumDownloader(AppConfig.getChromeDriverPath)
//                        .setSleepTime(2000))
//                //开启5个线程爬取，意味着浏览器会打开5个窗口同时操作
//                .thread(5)
//                .addPipeline(farmerPipeline)
//                .run();
////        spider.setExitWhenComplete(true);
////        spider.start();
////        spider.stop();
//    }

}
